Data Wrangling with Dynamic Attributes


In [8]:
from urllib.request import urlopen
import warnings
import os
import json

URL = 'http://www.oreilly.com/pub/sc/osconfeed'
JSON = 'data/osconfeed.json'

def load():
    if not os.path.exists(JSON):
        msg = 'downloading {} to {}'.format(URL, JSON)
        warnings.warn(msg)
        with urlopen(URL) as remote, open(JSON, 'wb') as local:
            local.write(remote.read())
            
    with open(JSON) as fp:
        return json.load(fp)

In [9]:
feed = load()

In [4]:
sorted(feed['Schedule'].keys())


Out[4]:
['conferences', 'events', 'speakers', 'venues']

In [5]:
for key, value in sorted(feed['Schedule'].items()):
    print('{:3} {}'.format(len(value), key))


  1 conferences
494 events
357 speakers
 53 venues

In [ ]:
feed['Schedule']['speakers'][-1]['name']

In [ ]:
feed['Schedule']['speakers'][-1]['serial']

In [ ]:
feed['Schedule']['events'][40]['name']

In [ ]:
feed['Schedule']['events'][40]['speakers']

Exploring JSON-Like Data with Dynamic Attributes


In [7]:
from collections import abc

class FrozenJSON:
    """A read-only facade for navigating a JSON-like object
    using attribute notation"""
    
    def __init__(self, mapping):
        self.__data = dict(mapping)                    #1
        
    def __getattr__(self, name):                       #2
        if hasattr(self.__data, name):
            return getattr(self.__data, name)          #3
        else:
            return FrozenJSON.build(self.__data[name]) #4
        
    @classmethod
    def build(cls, obj):                               #5
        if isinstance(obj, abc.Mapping):               #6
            return cls(obj)
        elif isinstance(obj, abc.MutableSequence):     #7
            return [cls.build(item) for item in obj]
        else:                                          #8
            return obj

In [8]:
from osconfeed import load
raw_feed = load()
feed = FrozenJSON(raw_feed)

In [9]:
raw_feed = load()

In [10]:
feed = FrozenJSON(raw_feed)

In [11]:
len(feed.Schedule.speakers)


Out[11]:
357

In [12]:
sorted(feed.Schedule.keys())


Out[12]:
['conferences', 'events', 'speakers', 'venues']

In [13]:
for key, value in sorted(feed.Schedule.items()):
    print('{:3} {}'.format(len(value), key))


  1 conferences
494 events
357 speakers
 53 venues

In [14]:
feed.Schedule.speakers[-1].name


Out[14]:
'Carina C. Zona'

In [15]:
talk = feed.Schedule.events[40]

In [16]:
type(talk)


Out[16]:
__main__.FrozenJSON

In [17]:
talk.name


Out[17]:
'There *Will* Be Bugs'

In [18]:
talk.speakers


Out[18]:
[3471, 5199]

In [19]:
talk.flavor


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-19-0ce41df2377c> in <module>()
----> 1 talk.flavor

<ipython-input-7-70aa8d36dce3> in __getattr__(self, name)
     12             return getattr(self.__data, name)          #3
     13         else:
---> 14             return FrozenJSON.build(self.__data[name]) #4
     15 
     16     @classmethod

KeyError: 'flavor'

The Invalid Attribute Name Problem


In [20]:
grad = FrozenJSON({'name': 'Jim Bo', 'class': 1982})

In [21]:
grad.class


  File "<ipython-input-21-ce4327ea3f6c>", line 1
    grad.class
             ^
SyntaxError: invalid syntax

In [22]:
getattr(grad,'class')


Out[22]:
1982

In [24]:
from collections import abc
import keyword

class FrozenJSON:
    """A read-only facade for navigating a JSON-like object
    using attribute notation"""
    
    def __init__(self, mapping):
        self.__data = {}
        for key, value in mapping.items():
            if keyword.iskeyword(key):
                key += '_'
            self.__data[key] = value
        
    def __getattr__(self, name):
        if hasattr(self.__data, name):
            return getattr(self.__data, name)
        else:
            return FrozenJSON.build(self.__data[name])
        
    @classmethod
    def build(cls, obj):
        if isinstance(obj, abc.Mapping):
            return cls(obj)
        elif isinstance(obj, abc.MutableSequence):
            return [cls.build(item) for item in obj]
        else:
            return obj

In [27]:
grad = FrozenJSON({'name': 'Jim Bo', 'class': 1982})
grad.class_


Out[27]:
1982

In [28]:
x = FrozenJSON({'2be': 'or not'})
x.2be


  File "<ipython-input-28-302340948057>", line 2
    x.2be
      ^
SyntaxError: invalid syntax

Flexible Object Creation with new


In [1]:
from collections import abc

class FrozenJSON:
    """A read-only facade for navigating a JSON-like object
    using attribute notation"""

    def __new__(cls, arg):                                  #1
        if isinstance(arg, abc.Mapping):
            return super().__new__(cls)                     #2
        elif isinstance(arg, abc.MutableSequence):          #3
            return [cls(item) for item in arg]
        else:
            return arg
        
    def __init__(self, mapping):
        self.__data = {}
        for key, value in mapping.items():
            if keyword.iskeyword(key):
                key += '_'
            self.__data[key] = value
        
    def __getattr__(self, name):
        if hasattr(self.__data, name):
            return getattr(self.__data, name)
        else:
            return FrozenJSON(self.__data[name])      #4

Restructuring the OSCON Feed with shelve


In [7]:
import warnings

import osconfeed

DB_NAME = 'data/schedule1_db'
CONFERENCE = 'conference.115'

class Record:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        
def load_db(db):
    raw_data = osconfeed.load()
    warnings.warn('loading' + DB_NAME)
    for collection, rec_list in raw_data['Schedule'].items():
        record_type = collection[:-1]
        for record in rec_list:
            key = '{}.{}'.format(record_type, record['serial'])
            record['serial'] = key
            db[key] = Record(**record)

In [8]:
import shelve
db = shelve.open(DB_NAME)

In [11]:
if CONFERENCE not in db:
    load_db(db)


C:\Users\langestrst01\AppData\Local\Continuum\Anaconda3\envs\fluentPy\lib\site-packages\ipykernel\__main__.py:14: UserWarning: loadingdata/schedule1_db

In [12]:
speaker = db['speaker.3471']

In [13]:
type(speaker)


Out[13]:
__main__.Record

In [14]:
speaker.name, speaker.twitter


Out[14]:
('Anna Martelli Ravenscroft', 'annaraven')

In [15]:
db.close()

Linked Record Retrieval with Properties


In [13]:
import warnings
import inspect

import osconfeed

DB_NAME = 'data/schedule2_db'
CONFERENCE = 'conference.115'

class Record:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
    
    def __eq__(self, other):
        if isinstance(other, Record):
            return self.__dict__ == other.__dict__
        else:
            return NotImplemented

class MissingDatabaseError(RuntimeError):
    """Raised when a database is required but was not set."""
    
class DbRecord(Record):
    
    __db = None
    
    @staticmethod
    def set_db(db):
        DbRecord.__db = db
        
    @staticmethod
    def get_db():
        return DbRecord.__db
    
    @classmethod
    def fetch(cls, ident):
        db = cls.get_db()
        try:
            return db[ident]
        except TypeError:
            if db is None:
                msg = "database not set; call '{}.set_db(mydb)'"
                raise MissingDatabaseError(msg.format(cls.__name__))
            else:
                raise
                
    def __repr__(self):
        if hasattr(self, 'serial'):
            cls_name = self.__class__.__name__
            return '<{} serial={!r}>'.format(cls_name, self.serial)
        else:
            return super().__repr__()
        
class Event(DbRecord):
    
    @property
    def venue(self):
        key = 'venue.{}'.format(self.venue_serial)
        return self.__class__.fetch(key)
    
    @property
    def speakers(self):
        if not hasattr(self, '_speaker_objs'):
            spkr_serials = self.__dict__['speakers']
            fetch = self.__class__.fetch
            self._speaker_objs = [fetch('speaker.{}'.format(key))
                                 for key in spkr_serials]
        return self._speaker_objs
    
    def __repr__(self):
        if hasattr(self, 'name'):
            cls_name = self.__class__.__name__
            return '<{} {!r}>'.format(cls_name, self.name)
        else:
            return super().__repr__()

def load_db(db):
    raw_data = osconfeed.load()
    warnings.warn('loading ' + DB_NAME)
    for collection, rec_list in raw_data['Schedule'].items():
        record_type = collection[:-1]
        cls_name = record_type.capitalize()
        cls = globals().get(cls_name, DbRecord)
        if inspect.isclass(cls) and issubclass(cls, DbRecord):
            factory = cls
        else:
            factory = DbRecord
        for record in rec_list:
            key = '{}.{}'.format(record_type, record['serial'])
            record['serial'] = key
            db[key] = factory(**record)

In [15]:
import shelve
db = shelve.open(DB_NAME)
if CONFERENCE not in db:
    load_db(db)

In [16]:
DbRecord.set_db(db)

In [17]:
event = DbRecord.fetch('event.33950')

In [18]:
event


Out[18]:
<Event 'There *Will* Be Bugs'>

In [19]:
event.venue


Out[19]:
<DbRecord serial='venue.1449'>

In [20]:
event.venue.name


Out[20]:
'Portland 251'

In [21]:
for spkr in event.speakers:
    print('{0.serial}: {0.name}'.format(spkr))


speaker.3471: Anna Martelli Ravenscroft
speaker.5199: Alex Martelli

In [22]:
event.speakers


Out[22]:
[<DbRecord serial='speaker.3471'>, <DbRecord serial='speaker.5199'>]

In [23]:
db.close()

Using a Property for Attribute Validation

LineItem Take #1: Class for an Item in an Order


In [24]:
class LineItem:
    
    def __init__(self, description, weight, price):
        self.description = description
        self.weight = weight
        self.price = price
        
    def subtotal(self):
        return self.weight * self.price

In [25]:
raisins = LineItem('Golden raisins', 10, 6.95)
raisins.subtotal()


Out[25]:
69.5

In [26]:
raisins.weight = -20
raisins.subtotal()


Out[26]:
-139.0

LineItem Take #2: A Validating Property


In [27]:
class LineItem:
    
    def __init__(self, description, weight, price):
        self.description = description
        self.weight = weight
        self.price = price
        
    def subtotal(self):
        return self.weight * self.price
    
    @property
    def weight(self):
        return self.__weight
    
    @weight.setter
    def weight(self, value):
        if value > 0:
            self.__weight = value
        else:
            raise ValueError('value must be > 0')

In [28]:
walnuts = LineItem('walnuts', 0, 10.00)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-259d2355c23c> in <module>()
----> 1 walnuts = LineItem('walnuts', 0, 10.00)

<ipython-input-27-046c3a305000> in __init__(self, description, weight, price)
      3     def __init__(self, description, weight, price):
      4         self.description = description
----> 5         self.weight = weight
      6         self.price = price
      7 

<ipython-input-27-046c3a305000> in weight(self, value)
     18             self.__weight = value
     19         else:
---> 20             raise ValueError('value must be > 0')

ValueError: value must be > 0

A Proper Look at Properties


In [29]:
class LineItem:
    
    def __init__(self, description, weight, price):
        self.description = description
        self.weight = weight
        self.price = price
        
    def subtotal(self):
        return self.weight * self.price
    
    def get_weight(self):
        return self.__weight
    
    def set_weight(self):
        if value > 0:
            self.__weight = value
        else:
            raise ValueError('value must be > 0')
            
    weight = property(get_weight, set_weight)

Properties Override Instance Attributes


In [30]:
class Class:
    data = 'the class data attr'
    @property
    def prop(self):
        return 'the prop value'

In [31]:
obj = Class()
vars(obj)


Out[31]:
{}

In [32]:
obj.data


Out[32]:
'the class data attr'

In [33]:
obj.data = 'bar'

In [34]:
vars(obj)


Out[34]:
{'data': 'bar'}

In [35]:
obj.data


Out[35]:
'bar'

In [36]:
Class.data


Out[36]:
'the class data attr'

In [37]:
Class.prop


Out[37]:
<property at 0x8e3364c9f8>

In [38]:
obj.prop


Out[38]:
'the prop value'

In [39]:
obj.prop = 'foo'


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-39-28151f4d3513> in <module>()
----> 1 obj.prop = 'foo'

AttributeError: can't set attribute

In [40]:
obj.__dict__['prop'] = 'foo'

In [41]:
vars(obj)


Out[41]:
{'data': 'bar', 'prop': 'foo'}

In [42]:
obj.prop


Out[42]:
'the prop value'

In [43]:
Class.prop = 'baz'

In [44]:
obj.prop


Out[44]:
'foo'

In [45]:
obj.data


Out[45]:
'bar'

In [46]:
Class.data


Out[46]:
'the class data attr'

In [47]:
Class.data = property(lambda self: 'the "data" prop value')

In [48]:
obj.data


Out[48]:
'the "data" prop value'

In [49]:
del Class.data

In [50]:
obj.data


Out[50]:
'bar'

Property Documentation


In [54]:
class Foo:
    
    @property
    def bar(self):
        """The bar attribute"""
        return self.__dict__['bar']
    
    @property
    def bar(self, value):
        self.__dict__['bar'] = value

In [55]:
help(Foo.bar)


Help on property:



In [56]:
help(Foo)


Help on class Foo in module __main__:

class Foo(builtins.object)
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  bar

Coding a Property Factory


In [58]:
def quantity(storage_name):
    
    def qty_getter(instance):
        return instance.__dict__[storage_name]
    
    def qty_setter(instance, value):
        if value > 0 :
            instance.__dict__[storage_name] = value
        else:
            return ValueError('value must be > 0')
        
    return property(qty_getter, qty_setter)

In [59]:
class LineItem:
    weight = quantity('weight')
    price = quantity('price')
    
    def __init__(self, description, weight, price):
        self.description = description
        self.weight = weight
        self.price = price
        
    def subtotal(self):
        return self.weight * self.price

In [60]:
nutmeg = LineItem('Moluccan nutmeg', 8, 13.95)
nutmeg.weight, nutmeg.price


Out[60]:
(8, 13.95)

In [63]:
sorted(vars(nutmeg).items())


Out[63]:
[('description', 'Moluccan nutmeg'), ('price', 13.95), ('weight', 8)]

Handling Attribute Deletion


In [ ]: